# R script for for replicating supplementary analysis in appendix A & B
# in 'When Parties Move to the Middle: The Role of Uncertainty'
# by J. Lindvall, D. Rueda, and H. Zhai
# this file written by: H Zhai (2022-11-03 [updated: -])
# on device: Mac Pro 13 Dual-Core Intel Core i5 2.3 GHz 

# PLEASE MAKE SURE ALL THE REPLICATION FILES (DATA AND SCRIPTS) ARE STORED AT THE SAME LEVEL IN THE SAME DIRECTORY
# OR MAKE SURE THE DIRECTORY-RELATED CODES ARE PROPERLY ADJUSTED 
# TO ENSURE THE CODES RUN WITHOUT DIRECTORY-RELATED PROBLEMS
# RESTART R SESSION BEFORE RUNNING

# This file replicates results from supplementary analysis reported in appendix sections A and B
# In the order they appear in text

# BEGIN SCRIPT
rm(list = ls())

# pkgs --------------------------------------------------------------------

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("Hmisc")) install.packages("Hmisc")
if (!require("matrixStats")) install.packages("matrixStats")
if (!require("scales")) install.packages("scales")
if (!require("stringr")) install.packages("stringr")

# graphx ------------------------------------------------------------------

theme_base <- theme_get()
theme_set(theme_minimal(base_size = 14))

# Figure 1 ----------------------------------------------------------------

## study setup ----

### positions
p02 <- c(45, 55)
p05 <- c(25,45,50,55,75)
### votes
v02_s <- c(55,45)
v02_b <- c(50,50)
v05_s <- c(10,40,10,30,10)
v05_b <- c(10,35,10,35,10)
### moves
s <- 0:5
k <- length(s)
### parties
np2 <- 2
np5 <- 5
p2 <- c("ml", "mr")
p5 <- c("fl","ml","c","mr","r")

## define helpers ----
gmed <- function(x, f, out = NULL) { # function for group median measures
  low <- min(x) # lower boundary of distribution
  up <- max(x) # upper boundary of distribution
  mid <- zoo::rollmean(x, k = 2) # get interval boundaries 
  while (isTRUE(all.equal(max(mid), up))) {
    mid <- mid[1:length(mid)-1] # drop duplicate boundaries
  }
  breaks <- unique(c(low, mid, up)) # boundaries
  int <- cut(x, breaks = breaks, include.lowest = T) # cut into intervals
  int <- sapply(strsplit(gsub("\\[|\\]|\\(|\\)", "", int), ","), as.numeric) # trim & convert interval to numerical
  cf <- cumsum(f) # cumulative frequency
  midrow <- findInterval(max(cf)/2, cf) + 1 # median class
  l <- int[1, midrow] # lower class boundary of median class
  h <- diff(int[, midrow]) # width of median class
  f2 <- f[midrow] # frequency of median class
  cf2 <- cf[max(midrow - 1, 1)] # cumulative frequency up to median class
  n_2 <- max(cf)/2 # total frequency divided by 2
  # return output as required
  if (!is.null(out)) {
    if (out == "med") unname(l + (n_2 - cf2)/f2 * h) # return median
    else unname(f2 / (h*max(cf))) # return class density
  } 
} 
revscore <- function(x) {max(x) + min(x) - x} # function for reverse scores

## simulate DGP ----

### skewed distribution
#### 2-party
sims_2 <- tibble(pos = p02, vote = v02_s, party = p2, move = "base", step = 0) %>% # baseline
  add_case(pos = rep(p02, each=k), vote = c(v02_s[1]+5*s, v02_s[2]-5*s), 
           party = rep(p2, each=k), move = "vote", step = rep(s, 2)) %>% # +vote
  add_case(pos = c(p02[1]-4*s, p02[2]+0*s), vote = rep(v02_s, each=k), 
           party = rep(p2, each=k), move = "distance", step = rep(s, 2)) # +distance 
#### 5-party
sims_5 <- tibble(pos = p05, vote = v05_s, party = p5, move = "base", step = 0) %>% # baseline
  add_case(pos = rep(p05, each=k), vote = c(v05_s[1]-(5/4)*s, v05_s[2]+5*s, v05_s[3]-(5/4*s), v05_s[4]-(5/4*s), v05_s[5]-(5/4*s)), 
           party = rep(p5, each=k), move = "vote", step = rep(s, 5)) %>% # +vote
  add_case(pos = c(rep(p05[1], each=k), p05[2]-4*s, rep(p05[3:5], each=k)), vote = rep(v05_s, each=k), 
           party = rep(p5, each=k), move = "distance", step = rep(s, 5)) # +distance 

### bimodal
#### 2-party
simb_2 <- tibble(pos = p02, vote = v02_b, party = p2, move = "base", step = 0) %>% # baseline
  add_case(pos = rep(p02, each=k), vote = c(v02_b[1]+5*s, v02_b[2]-5*s), 
           party = rep(p2, each=k), move = "vote", step = rep(s, 2)) %>% # +vote
  add_case(pos = c(p02[1]-4*s, p02[2]+0*s), vote = rep(v02_b, each=k), 
           party = rep(p2, each=k), move = "distance", step = rep(s, 2)) # +distance 
#### 5-party
simb_5 <- tibble(pos = p05, vote = v05_b, party = p5, move = "base", step = 0) %>% # baseline
  add_case(pos = rep(p05, each=k), vote = c(v05_b[1]-(5/4)*s, v05_b[2]+5*s, v05_b[3]-(5/4*s), v05_b[4]-(5/4*s), v05_b[5]-(5/4*s)), 
           party = rep(p5, each=k), move = "vote", step = rep(s, 5)) %>% # +vote
  add_case(pos = c(rep(p05[1], each=k), p05[2]-4*s, rep(p05[3:5], each=k)), vote = rep(v05_b, each=k), 
           party = rep(p5, each=k), move = "distance", step = rep(s, 5)) # +distance 

## estimate uncertainty ----

### skewed
#### 2-party
ests_2 <- sims_2 %>% 
  group_by(move, step) %>% 
  arrange(pos) %>% 
  summarise(med = gmed(pos, vote, "med"), rtf = gmed(pos, vote, "den"),
            wsd = sqrt(wtd.var(pos, vote)), wmad = weightedMad(pos, vote), wiqr = unname(diff(wtd.quantile(pos, vote, c(0.25,0.75))))) %>%
  ungroup() %>% 
  mutate_at(vars(wsd:wiqr), revscore) %>% 
  mutate_at(vars(rtf:wiqr), scale) %>% 
  gather("measure", "value", rtf:wiqr)
#### 5-party
ests_5 <- sims_5 %>% 
  group_by(move, step) %>% 
  arrange(pos) %>% 
  summarise(med = gmed(pos, vote, "med"), rtf = gmed(pos, vote, "den"),
            wsd = sqrt(wtd.var(pos, vote)), wmad = weightedMad(pos, vote), wiqr = unname(diff(wtd.quantile(pos, vote, c(0.25,0.75))))) %>%
  ungroup() %>% 
  mutate_at(vars(wsd:wiqr), revscore) %>% 
  mutate_at(vars(rtf:wiqr), scale)%>% 
  gather("measure", "value", rtf:wiqr)

### bimodal
#### 2-party
estb_2 <- simb_2 %>% 
  group_by(move, step) %>% 
  arrange(pos) %>% 
  summarise(med = gmed(pos, vote, "med"), rtf = gmed(pos, vote, "den"),
            wsd = sqrt(wtd.var(pos, vote)), wmad = weightedMad(pos, vote), wiqr = unname(diff(wtd.quantile(pos, vote, c(0.25,0.75))))) %>%
  ungroup() %>% 
  mutate_at(vars(wsd:wiqr), revscore) %>% 
  mutate_at(vars(rtf:wiqr), scale)%>% 
  gather("measure", "value", rtf:wiqr)
#### 5-party
estb_5 <- simb_5 %>% 
  group_by(move, step) %>% 
  arrange(pos) %>% 
  summarise(med = gmed(pos, vote, "med"), rtf = gmed(pos, vote, "den"),
            wsd = sqrt(wtd.var(pos, vote)), wmad = weightedMad(pos, vote), wiqr = unname(diff(wtd.quantile(pos, vote, c(0.25,0.75))))) %>%
  ungroup() %>% 
  mutate_at(vars(wsd:wiqr), revscore) %>% 
  mutate_at(vars(rtf:wiqr), scale)%>% 
  gather("measure", "value", rtf:wiqr)

## Panel A: Bipartisan System ----

### graphx
facetlab <- c("+Distance", "+Vote") # facet labels
names(facetlab) <- c("distance", "vote") # facet label names 
measures <- c("RFD", "WIQR", "WMAD", "WSD") # legend labels

### make plot
ggplot(ests_2 %>% filter(move != "base")) +
  geom_point(aes(step, value, shape = measure, color = measure), size = 2) +
  geom_line(aes(step, value, lty = measure, color = measure)) +
  facet_grid(~move, labeller = labeller(move = facetlab)) +
  scale_shape_discrete(labels = measures) +
  scale_color_grey(start = 0.2, end = 0.8, labels = measures) +
  scale_linetype_discrete(labels = measures) +
  labs(title = "Skewed Distribution I: Bipartisan Case", x = "Simulation Round", y = "Estimated Certainty (Std)", 
       color = "Measure", shape = "Measure", linetype = "Measure")

## Panel B: Multiparty System ----
ggplot(ests_5 %>% filter(move != "base")) +
  geom_point(aes(step, value, shape = measure, color = measure), size = 2) +
  geom_line(aes(step, value, lty = measure, color = measure)) +
  facet_grid(~move, labeller = labeller(move = facetlab)) +
  scale_shape_discrete(labels = measures) +
  scale_color_grey(start = 0.2, end = 0.8, labels = measures) +
  scale_linetype_discrete(labels = measures) +
  labs(title = "Skewed Distribution II: Multiparty Case", x = "Simulation Round", y = "Estimated Certainty (Std)", 
       color = "Measure", shape = "Measure", linetype = "Measure") 

# Figure 2 ----------------------------------------------------------------

# inherits setup and results from Figure 1 

## Panel A: Bipartisan System ----
ggplot(estb_2 %>% filter(move != "base")) +
  geom_point(aes(step, value, shape = measure, color = measure), size = 2) +
  geom_line(aes(step, value, lty = measure, color = measure)) +
  facet_grid(~move, labeller = labeller(move = facetlab)) +
  scale_shape_discrete(labels = measures) +
  scale_color_grey(start = 0.2, end = 0.8, labels = measures) +
  scale_linetype_discrete(labels = measures) +
  labs(title = "Bimodal Distribution I: Bipartisan Case", x = "Simulation Round", y = "Estimated Certainty (Std)", 
       color = "Measure", shape = "Measure", linetype = "Measure") 

## Panel B: Bimodal System ----
ggplot(estb_5 %>% filter(move != "base")) +
  geom_point(aes(step, value, shape = measure, color = measure), size = 2) +
  geom_line(aes(step, value, lty = measure, color = measure)) +
  facet_grid(~move, labeller = labeller(move = facetlab)) +
  scale_shape_discrete(labels = measures) +
  scale_color_grey(start = 0.2, end = 0.8, labels = measures) +
  scale_linetype_discrete(labels = measures) +
  labs(title = "Bimodal Distribution II: Multiparty Case", x = "Simulation Round", y = "Estimated Certainty (Std)", 
       color = "Measure", shape = "Measure", linetype = "Measure") 

## clear env. ----
rm(list = setdiff(ls(),c("gmed","theme_base"))) # keep `gmed` for subsequent analysis

# Figure 3 ----------------------------------------------------------------

## study setup ----
### 3 parties
pt <- c("L", "C", "R")
### base position = {40, 50, 60}
p <- c(40, 50, 60)
### base vote = {40, 20, 40}
v <- c(40, 20, 40)
### baseline 
base <- tibble(pt, p, v) %>% arrange(p) %>% 
  mutate(pmed = gmed(p, v, out = "med"),
         dmed = (gmed(p, v, out = "den") %>% 
                   rescale(from=c(0,1), to=c(0,100))))
### base uncertainty
dmed0 <- unique(base$dmed)

## helpers ----
f_simp <- function(x) { 
  tx <- length(x)
  p <- c(
    p[pt=="L"] + x,
    rep(p[pt=="C"], tx),
    p[pt=="R"] - x
  )
  v <- rep(v, each = tx)
  pt <- rep(pt, each = tx)
  st <- rep(x, 3)
  df <- tibble(p, v, pt, st)
  df %>% group_by(st) %>% 
    arrange(p) %>% 
    mutate(pmed = gmed(p, v, out = "med"),
           dmed = (gmed(p, v, out = "den") %>% 
                     rescale(from=c(0,1), to=c(0,100)))
    ) %>% ungroup()
}
f_addbounds <- function(data) {
  p <- with(data, c(2*p[pt=="L"] - p[pt=="C"], 2*p[pt=="R"] - p[pt=="C"]))
  add_row(data, p, v = c(0,0))
} # add upper/lower bounds 


## simulate DGP ----
simp <- f_simp(-15:9)

## Panel A: Setup ----
ggplot(data = simp[simp$st==-15,], aes(x = p, y = v)) + # max divergence
  geom_bar(stat = "identity", width = 5, alpha = 0.7) + 
  geom_smooth(data = f_addbounds(simp[simp$st==-15,]), method = "loess", color = "black", size = 0.3) +
  geom_bar(data = base, stat = "identity", fill = NA, color = "darkgrey", lty = "dashed", width = 5) + # benchmack case
  geom_smooth(data = f_addbounds(base), method = "loess", color = "darkgrey", lty = "dashed", size = 0.3) +
  geom_text(aes(x = p, y = v/2, label = pt), size = 7) +
  xlim(0, 100) + ylim(0,60) + scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
  labs(title = "Ideological Change I: Divergence", x = "Ideology", y = "Vote Share (%)") 
ggplot(data = simp[simp$st==9,], aes(x = p, y = v)) + # max convergence
  geom_bar(stat = "identity", width = 2.5, alpha = 0.7) + 
  geom_smooth(data = f_addbounds(simp[simp$st==9,]), method = "loess", color = "black", size = 0.3) +
  geom_bar(data = base, stat = "identity", fill = NA, color = "darkgrey", lty = "dashed", width = 5) + # benchmack case
  geom_smooth(data = f_addbounds(base), method = "loess", color = "darkgrey", lty = "dashed", size = 0.3) +
  geom_text(aes(x = p, y = v/2, label = pt), size = 7) +
  xlim(0,100) + ylim(0,60) + scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
  labs(title = "Ideological Change II: Convergence", x = "Ideology", y = "Vote Share (%)") 

## Panel B: Result ----
ggplot(simp, aes(x = st, y = dmed)) +
  geom_point() + geom_line() +
  geom_hline(yintercept = dmed0, lty = "dashed") +
  scale_x_continuous(breaks = pretty_breaks(n=10)) +
  labs(title = "Median certainty under ideological change",
       subtitle = "Keeping vote share constant",
       x = "+/- Ideology (Left Party)", y = "Median Voter Certainty") 

# Figure 4 ----------------------------------------------------------------

# inherits setup from Figure 3
## helper ----
f_simv <- function(x) { # simulation function
  tx <- length(x)
  p <- rep(p, each = tx)
  v <- c(
    v[pt=="L"] - x/2,
    v[pt=="C"] + x,
    v[pt=="R"] - x/2
  )
  pt <- rep(pt, each = tx)
  st = rep(x, 3)
  df <- tibble(p, v, pt, st)
  df %>% group_by(st) %>% 
    mutate(pmed = gmed(p, v, out = "med"),
           dmed = (gmed(p, v, out = "den") %>% 
                     rescale(from = c(0,1), to = c(0,100)))
    ) %>% ungroup()
}

## simulate DGP ----
simv <- f_simv(-15:20)

## Panel A: Setup ----
ggplot(data = simv[simv$st==-15,], aes(x = p, y = v)) + # max divergence
  geom_bar(stat = "identity", width = 5, alpha = 0.7) + 
  geom_smooth(data = f_addbounds(simv[simv$st==-15,]), method = "loess", color = "black", size = 0.3) +
  geom_bar(data = base, stat = "identity", fill = NA, color = "darkgrey", lty = "dashed", width = 5) + # benchmack case
  geom_smooth(data = f_addbounds(base), method = "loess", color = "darkgrey", lty = "dashed", size = 0.3) +
  geom_text(aes(x = p, y = v/2, label = pt), size = 7) +
  xlim(20,80) + ylim(0,60) + scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
  labs(title = "Vote Share Change I: Bimodality", x = "Ideology", y = "Vote Share (%)") 
ggplot(data = simv[simv$st==20,], aes(x = p, y = v)) + # max convergence
  geom_bar(stat = "identity", width = 5, alpha = 0.7) + 
  geom_smooth(data = f_addbounds(simv[simv$st==20,]), method = "loess", color = "black", size = 0.3) +
  geom_bar(data = base, stat = "identity", fill = NA, color = "darkgrey", lty = "dashed", width = 5) + # benchmack case
  geom_smooth(data = f_addbounds(base), method = "loess", color = "darkgrey", lty = "dashed", size = 0.3) +
  geom_text(aes(x = p, y = v/2, label = pt), size = 7) +
  xlim(20,80) + ylim(0,60) + scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
  labs(title = "Vote Share Change II: Unimodality", x = "Ideology", y = "Vote Share (%)") 

## Panel B: Result ----
ggplot(simv, aes(st, dmed)) +
  geom_point() + geom_line() +
  geom_hline(yintercept = dmed0, lty = "dashed") +
  scale_x_continuous(breaks = pretty_breaks(n=10)) +
  labs(title = "Median certainty under vote share change",
       subtitle = "Keeping party positions constant",
       x = "+/- Vote Share (Center Party)", y = "Median Voter Certainty") 

# Figure 5 ----------------------------------------------------------------

# drawn directly by hand on https://app.diagrams.net/. code files (.drawio files) available from the authors upon request.
# no analysis involved. schematic illustration only.

# reset graphx ------------------------------------------------------------

theme_set(theme_base)

# clear env. --------------------------------------------------------------

rm(list = ls())

# END SCRIPT